
library(tidyverse)

fn.education <- function(dataframe){
  if (dataframe$country=="Australia"){
    names(dataframe)[names(dataframe) == "Q22.4"] <- "education"
    #Australia
    Low <- c("Year 9",
             "Year 8 or below",
             "Certificate II",
             "Certificate I",
             "No educational attainment")

    Medium <- c("Year 12",
             "Year 11",
             "Year 10",
             "Certificate III & IV Level",
             "Certificate IV",
             "Certificate III",
             "Advanced Diploma and Diploma Level",
             "Advanced Diploma",
             "Associate Degree",
             "Diploma")
    
    High <- c("Doctoral Degree Level",
              "Higher Doctorate",
              "Professional Specialist Qualification, Doctoral Level",
              "Master Degree Level",
              "Graduate Diploma",
              "Graduate Certificate",
              "Bachelor Degree Level")
  }
  else if (dataframe$country=="Brazil"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"
    #Brasil
    Low <- c("Creche, Pré-escolar (Maternal e Jardim de Infância), Classe de alfabetização - CA",
         "Alfabetização de Jovens e Adultos",
         "Antigo Primário (Elementar)",
         "Antigo Ginásio (Médio 1º Ciclo)",
         "Regular do Ensino Fundamental ou 1º Grau (da 1aa 3a série/ do 1º ao 4º ano)", 
         "Regular do Ensino Fundamental ou 1º Grau (da 4a série/5º ano)",
         "Regular do Ensino Fundamental ou 1ºGrau (da 5aa 8asérie/ do 6º ao 9º ano)",
         "Supletivo do Ensino Fundamental ou do 1ºGrau")

    Medium <- c("Antigo Científico, Clássico, etc. (Médio 2o ciclo)",
            "Regular ou Supletivo do Ensino Médio ou do 2º Grau")
    
    High <- c("Superior de Graduação",
          "Especialização de Nível Superior (mínimo de 360 horas)",
          "Mestrado",
          "Doutorado")
  
  }
  else if (dataframe$country == "Canada"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"
    #Canada
    Low <- c("No certificate, diploma or degree")

    Medium <- c("Secondary (high) school diploma or equivalency certificate",
            "Trades certificate or diploma other than Certificate of Apprenticeship or Certificate of Qualification",
            "Certificate of Apprenticeship or Certificate of Qualification",
            "Program of 3 months or less than 1 year (College, CEGEP or other non-university certificate or diploma from a program of 3 months to less than 1 year)",
            "Program of 1 to 2 years (College, CEGEP or other non-university certificate or diploma from a program of 1 year to 2 years)",
            "Program of more than 2 years (College, CEGEP or other non-university certificate or diploma from a program of more than 2 years)",
            "University certificate or diploma below bachelor level")
    
    High <- c("Bachelor's degree",
          "University certificate or diploma above bachelor level",
          "Degree in medicine, dentistry, ceterinary medicine or optometry",
          "Master's degree",
          "Earned doctorate")
  }
  else if (dataframe$country == "Chile"){
    
    
    dataframe <- dataframe %>% rename(education = Q22.4 )
    
    dataframe <- dataframe %>% mutate(education = if_else(is.na(education),Q22.3,education))
    
    #names(dataframe)[names(dataframe) == "Q22.4"] <- "education"
    
    #Chile
    Low <- c("Especial o Diferencial",
         "Educación Básica",
         "Primaria o Preparatoria (Sistema antiguo)",
         "Nunca asistió")
    
    Medium <- c("Científico-Humanista",
            "Técnica Profesional",
            "Humanidades (Sistema antiguo)",
            "Técnico Nivel Superior (carreras 1-3 años)")
    
    High <- c("Profesional (carreras 1-4 años)",
          "Magíster",
          "Doctorado")
  }
  else if (dataframe$country == "Colombia"){
    names(dataframe)[names(dataframe) == "Q22.3"] <- "education"
    #Colombia
    
    Low <- c("Ninguno", 
         "Preescolar",
         "Básica primaria (1.°-5.°)",
         "Básica secundaria (Bachillerato básico, 6.°-9.°)")

    Medium <- c("Media académica o clásica (Bachillerato clásico, 10.°-13.°)",
            "Media técnica (Bachillerato técnico)",
            "Normalista",
            "Técnica Profesional",
            "Tecnológica")

    High <- c("Universitario",
          "Especialización",
          "Maestría",
          "Doctorado")
  }
  else if (dataframe$country == "France"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"

    #France

    Low <- c("Pas de scolarité ou arrêt avant la fin du primaire",
         "Aucun diplôme et scolarité interrompue à la fin du primaire ou avant la fin du collège",
         "CEP (certificat d’études primaires)")

    Medium <- c("Aucun diplôme et scolarité jusqu’à la fin du collège ou au-delà",
            "BEPC, brevet élémentaire, brevet des collèges, DNB",
            "CAP, BEP ou diplôme de niveau équivalent")

    High <- c("Baccalauréat général ou technologique, brevet supérieur, capacité en droit, DAEU, ESEU" ,
          "Baccalauréat professionnel, brevet professionnel, de technicien ou d’enseignement, diplôme équivalent",
          "BTS, DUT, Deug, Deust, diplôme de la santé ou du social de niveau bac+2, diplôme équivalent"  ,
          "Licence, licence pro, maîtrise, diplôme équivalent de niveau bac+3 ou bac+4",
          "Master, DEA, DESS, diplôme grande école niveau bac+5, doctorat de santé",
          "Doctorat de recherche (hors santé)")
  }
  else if (dataframe$country == "Italy"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"
    #Italy

    Low <- c("Nessuna istruzione formale",
         "Qualifica scuola elementare (o equivalente)",
         "Qualifica scuola media")

    Medium <- c("Attestato di qualificazione professionale che non permette accesso all'università (2-3 anni) / Attestato di qualifica professionale (operatore)",
            "Diploma di istruzione secondaria di II grado di 5 anni/ Istituto Formazione Tecnico Superiore (dal 2000) / Istituto Tecnico Superiore (2 anni)")

    High <- c("Diploma di Accademia (Belle Arti, Dramma Nazionale, Danza Nazionale), Conservatorio di Musica, Istituto di Musica",
          "Laurea vecchio ordinamento",
          "Laurea triennale (3 anni)",
          "Laurea magistrale / Master (2 anni)",
          "Laurea vecchio ordinamento (4-6 anni) / Laurea a ciclo unico (5-6anni)",
          "Dottorato di ricerca")
  }
  else if (dataframe$country == "Spain"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"

    #Spain

    Low <- c("No sabe leer o escribir",
         "Sabe leer y escribir pero fue menos de 5 años a la escuela",
         "Fue a la escuela 5 o más años pero no llegó al último curso de ESO, EGB o Bachiller Elemental",
         "Llegó al último curso de ESO, EGB o Bachiller Elemental o tiene el Certificado de Escolaridad o de Estudios Primarios")

    Medium <- c("Bachiller (LOE, LOGSE), BUP, Bachiller Superior, COU, PREU",
            "FP grado medio, FP I, Oficialía Industrial o equivalente, Grado Medio de Música y Danza, Certificados de Escuelas Oficiales de Idiomas",
            "FP grado superior, FP II, Maestría industrial o equivalente")

    High <- c("Diplomatura universitaria, Arquitectura Técnica, Ingeniería Técnica o equivalente",
          "Grado Universitario o equivalente",
          "Licenciatura, Arquitectura, Ingeniería o equivalente",
          "Master oficial universitario (a partir de 2006), Especialidades Médicas o análogos",
          "Doctorado")
  }
  else if (dataframe$country == "UK"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"
    #UK

    Low <- c("No formal qualifications",
         "Youth training certificate/skillseekers",
         "Recognised trade apprenticeship completed",
         "Clerical and commercial",
         "City & Guilds certificate",
         "City & Guilds certificate - advanced",
         "ONC",
         "CSE grades 2-5",
         "CSE grade 1, GCE O level, GCSE, School Certificate",
         "Scottish Ordinary/ Lower Certificate")

    Medium <- c("GCE A level or Higher Certificate",
            "Scottish Higher Certificate",
            "Nursing qualification (e.g. SEN, SRN, SCM, RGN)",
            "Teaching qualification (not degree)",     
            "Other technical, professional or higher qualification")

    High <- c("University diploma",
          "University or CNAA first degree (e.g. BA, B.Sc, B.Ed)",
          "University or CNAA higher degree (e.g. M.Sc, Ph.D)")
  }
  else if (dataframe$country == "US"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"
  
    #USA

    Low <- c("Nursery to 8th Grade",
         "Some high school","None")

    Medium <- c("High school graduate, diploma (or equivalent)",
            "Some college education, no degree",
            "Training/vocational college",
            "Associate degree")

    High <- c("Bachelor's degree",
          "Master's degree (including professional degrees, or equivalent)",
          "Doctorate degree")
  }
  else if (dataframe$country == "India"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"

    #India

    Low <- c("No formal education",
         "Incomplete primary school",
         "Completed primary school",
         "Middle pass / Matric fail")

    Medium <- c("Matric pass / 10th pass","11th pass, not completed intermediate","12th pass / Intermediate")

    High <- c("Undergraduate - Bachelor's / Diploma",
          "Postgraduate degree - Masters / PhD")
  }
  else if (dataframe$country == "Uganda"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"

    #Uganda

    Low <- c("Never been to school",
         "Did not complete Pre-Primary",
         "Completed Pre-Primary",
         "In P1 but did not complete / attend Pre-primary",
         "Did not complete P1",
         "P1","P2","P3","P4","P5","P6","P7","J1-J3")

    Medium <- c("S1","S2","S3","S4","S5","S6","Professional Certificate","Diploma")

    High <- c("First Degree",
          "Post Graduate Certificate",
          "Post Graduate Diploma",
          "Masters Degree",
          "PhD")
  }
  else if (dataframe$country == "China"){
    names(dataframe)[names(dataframe) == "Q22.2"] <- "education"

    #China

    char1 <- function(string){
  
      string <-unlist(strsplit(string,""))[1]
  
      return(string)

      }
    
    dataframe$education <- lapply(dataframe$education,char1)
    
    Low <- c("从",
         "小",
         "初")

    Medium <- c("高",
            "职",
            "中",
            "高")

    High <- c("本",
          "研")
  
  }
  else {return(dataframe)}
  
  dataframe <- dataframe %>% mutate(education = if_else(education %in% Low,"Low",
                                                 if_else(education %in% Medium,"Medium",
                                                 if_else(education %in% High,"High","NA"))))       
  return(dataframe)
  }



#Age

fn.age <- function(dataframe){
  if (dataframe$country=="Chile"){
    
    dataframe <- dataframe %>% mutate(age=if_else(age <= 25 & age >17,"18-25",
                                          if_else(age <= 35 & age >=26,"26-35",
                                          if_else(age <= 45 & age >=36,"36-45",
                                          if_else(age <= 55 & age >=46,"46-55",
                                          if_else(age >=56,"56+","NA"))))))
    return(dataframe)
  }
  else{
  dataframe <- dataframe %>% mutate(age=if_else(age <= 29 & age >17,"18-29",
                                        if_else(age <= 39 & age >=30,"30-39",
                                        if_else(age <= 49 & age >=40,"40-49",
                                        if_else(age <= 59 & age >=50,"50-59",
                                        if_else(age >=60,"60-99","NA"))))))

  return(dataframe)
  }
  
}



#Regions
fn.regions <- function(dataframe){
  if (dataframe$country == "Australia"){
    #Australia
    
    West_North <- c("Western Australia", "Northern Territory")

  
    Queen_South_A <- c("Queensland","South Australia")

  
    NSW_AUCapT <- c("New South Wales","Australian Capital Territory")

  
    Vict_Tasm <- c("Victoria", "Tasmania")


  
    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% West_North,"West+North",
                                         if_else(REGION_0 %in% Queen_South_A,"Queen+South",
                                        if_else(REGION_0 %in% NSW_AUCapT,"NSW+AUCapT",
                                        if_else(REGION_0 %in% Vict_Tasm,"Vict+Tasm","NA"))))) 

  }
  else if(dataframe$country=="Brazil"){
    #Brasil


    North <- c("Rondônia","Acre","Amazonas","Roraima","Pará","Amapá","Tocantins")

    North_East <- c("Maranhão","Piauí","Ceará","Rio Grande Do Norte","Paraíba","Pernambuco","Alagoas","Sergipe","Bahia")

    Central <- c("Mato Grosso Do Sul","Mato Grosso","Goiás","Distrito Federal")

    South_East <- c("Minas Gerais","Espírito Santo","Rio De Janeiro","São Paulo")

    South <- c("Paraná","Santa Catarina","Rio Grande Do Sul")


    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% North,"North",
                                             if_else(REGION_0 %in% North_East, "North East",
                                             if_else(REGION_0 %in% Central,"Central",
                                             if_else(REGION_0 %in% South,"South",
                                             if_else(REGION_0 %in% South_East,"South East","NA"))))))
  }
  else if(dataframe$country=="Canada"){
    #Canada

    Atl_Can <- c("New Brunswick / Nouveau-Brunswick", "Nova Scotia / Nouvelle-Écosse", "Prince Edward Island / Île-du-Prince-Édouard","Newfoundland and Labrador / Terre-Neuve-et-Labrador")

    Brit_Col <- c("British Columbia / Colombie-Britannique","Yukon")

    Prairies <- c("Alberta","Manitoba","Saskatchewan","Northwest Territories","Nunavut")

    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% Atl_Can,"Atlantic Canada",
                                                     if_else(REGION_0 %in% Brit_Col,"British Columbia",
                                                             if_else(REGION_0 %in% Prairies,"Prairies",
                                                                     if_else(REGION_0 == "Ontario","Ontario",
                                                                           ifelse(REGION_0 == "Quebec / Québec","Quebec","NA"))))))

  }
  else if(dataframe$country=="Chile"){

    #Chile

    North <- c("Arica y Parinacota","Antofagasta",
           "Atacama","Coquimbo","Tarapacá")

    Center <- c("Metropolitana de Santiago","Valparaíso","Biobío", 
            "Maule","Libertador General Bernardo O'Higgins")

    South <- c("Aysén del General Carlos Ibáñez del Campo",
           "Los Lagos","La Araucanía","Ñuble","Los Ríos",
           "Magallanes y de la Antártica Chilena")


    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% North,"North",
                                             if_else(REGION_0 %in% Center,"Center",
                                             if_else(REGION_0 %in% South,"South","NA"))))
  }
  else if (dataframe$country=="China"){

    #China
    North <- c("天津市","内蒙古自治区","山西省","河北省","北京市")

    East <- c("上海市","吉林省","安徽省","山东省","江苏省","江西省",
          "浙江省","福建省","辽宁省","黑龙江省")

    South_Central <- c("广东省","广西壮族自治区","河南省",
                   "海南省","湖北省","湖南省","澳门特别行政区",
                   "香港特别行政区")

    West <- c("云南省","四川省","宁夏回族自治区","甘肃省","贵州省",
          "重庆市","陕西省","青海省","西藏自治区","新疆维吾尔自治区")

    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% North,"North",
                                                     if_else(REGION_0 %in% East,"East",
                                                             if_else(REGION_0 %in% South_Central,"South Central",
                                                                     if_else(REGION_0 %in% West ,"West","NA")))))

  }
  else if (dataframe$country=="Colombia"){
    
    #Colombia
    Andina <- c("Antioquia","Boyacá","Caldas","Cundinamarca",
            "Huila","Norte de Santander","Putumayo","Quindío",
            "Risaralda","Santander","Tolima","Bogotá D.C.")


    Caribe <- c("Sucre","Atlántico","Bolívar","Córdoba","La Guajira","Magdalena",
            "Sucre","Cesar","Archipiélago de San Andrés, Providencia y Santa Catalina")

    Pacifico <- c("Chocó","Valle del Cauca","Nariño","Cauca")

    Orinaquia <- c("Arauca","Casanare","Meta","Vichada")

    Amazonia <- c("Caquetá","Amazonas","Guainía","Guaviare","Vaupés")


    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% Andina,"Andina",
                                             if_else(REGION_0 %in% Caribe,"Caribe",
                                             if_else(REGION_0 %in% Pacifico,"Pacifico",
                                             if_else(REGION_0 %in% Orinaquia,"Orinaquia",
                                            if_else(REGION_0 %in% Amazonia,"Amazonia","NA"))))))
  }
  else if (dataframe$country=="France"){
    #France

    Ile_de_Fra <- c("Île-de-France")

    North_West <- c("Centre-Val de Loire","Normandie", "Hauts-de-France", "Pays de la Loire", "Bretagne")

    North_East <- c("Bourgogne-Franche-Comté","Grand Est")

    South_West <- c( "Nouvelle-Aquitaine")

    South_East <- c("Corse", "Occitanie","Auvergne-Rhône-Alpes", "Provence-Alpes-Côte d'Azur" )



    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% Ile_de_Fra ,"Ile de France",
                                             if_else(REGION_0 %in% North_West,"North West",
                                             if_else(REGION_0 %in% North_East,"North East",
                                             if_else(REGION_0 %in% South_West,"South West",
                                             if_else(REGION_0 %in% South_East,"South East","NA"))))))
  }
  else if (dataframe$country =="India"){

    #India

    North <- c("Jammu & Kashmir","Himachal Pradesh","Punjab","Chandigarh","Uttarakhand","Harayana","NCT of Delhi",
           "Uttar Pradesh")
  
    East_NE <- c("Bihar","Sikkim","Nagaland","Arunachal Pradesh","Manipur","Mizoram","Tripura","Meghalaya",
             "Assam","West Bengal","Jharkhand","Odisha")
  
    South <- c("Andhra Pradesh","Karnataka","Lakshwadeep","Kerala","Tamil Nadu","Puducherry")
  
    Central_W <-c("India","Rajasthan","Chattisgarh","Madhya Pradesh","Gujarat","Daman and Diu","Dadra and Nagar Haveli",
              "Maharashtra","Goa") 


    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% North,"North",
                                               if_else(REGION_0 %in% East_NE ,"East+NorthEast",
                                               if_else(REGION_0 %in% South,"South",
                                               if_else(REGION_0 %in% Central_W,"Central+West","NA")))))
                                                                                                                                                                                     
  }
  else if (dataframe$country=="Italy"){
    #Italy

    North_West <- c("Piemonte","Valle d'Aosta","Lombardia","Liguria")

    North_East <- c("Trentino-Alto Adige","Veneto","Friuli Venezia Giulia","Emilia Romagna")

    Center <- c("Toscana","Umbria","Marche","Lazio")

    South_Islands <- c("Abruzzo","Molise","Campania","Puglia","Basilicata","Calabria","Sicilia","Sardegna")

    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% Center,"Center",
                                                     if_else(REGION_0 %in% North_West,"North West",
                                                             if_else(REGION_0 %in% North_East,"North East",
                                                                     if_else(REGION_0 %in% South_Islands,"South+Islands","NA")))))
  }
  else if (dataframe$country=="Spain"){
    #Spain

    North <- c("Galicia","Principado de Asturias","Cantabria",
           "País Vasco","Comunidad Foral de Navarra",
           "Castilla y LeÃ³n","La Rioja")
    
    Center <- c("Aragón","Comunidad de Madrid","Extremadura",
            "Castilla-La Mancha")

    East_Islands <- c("Cataluña","Comunitat Valenciana","Canarias",
                  "Islas Baleares")

    South <- c("Andalucía","Región de Murcia","Ceuta","Melilla")


    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% North,"North",
                                                 if_else(REGION_0 %in% Center, "Center",
                                                         if_else(REGION_0 %in% East_Islands,"East_Islands",
                                                                 if_else(REGION_0 %in% South,"South","NA")))))
  }
  else if (dataframe$country=="Uganda"){}
  else if (dataframe$country=="UK"){

    #UK

    NorthENG_Scotl_NI <- c("Scotland","North East (England)","North West (England)","Yorkshire and The Humber","Northern Ireland")

    SouthENG_Wales <- c("South East (England)","South West (England)","Wales")

    London_ML <- c("London","East Midlands (England)","East of England","West Midlands (England)")


    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% NorthENG_Scotl_NI,"NorthENG+Scot+NorthI",
                                                 if_else(REGION_0 %in% SouthENG_Wales,"SouthENG+Wales",
                                                         if_else(REGION_0 %in% London_ML ,"London+Midland+East","NA"))))
  }
  else if (dataframe$country=="US"){

    #USA

    Mid_West <- c("Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin","Iowa", "Kansas", 
              "Minnesota", "Missouri", "Nebraska", "North Dakota", "South Dakota")
    
    North_East <- c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont",
                "New Jersey", "New York", "Pennsylvania")

    West <- c("Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming",
          "Alaska", "California", "Hawaii", "Oregon", "Washington")

    South <- c("Delaware", "District of Columbia", "Florida", "Georgia", "Maryland", "North Carolina", "South Carolina", "Virginia", "West Virginia",
           "Alabama", "Kentucky", "Mississippi", "Tennessee","Arkansas", "Louisiana", "Oklahoma", "Texas")


    dataframe <- dataframe %>% mutate(REGION_0=if_else(REGION_0 %in% Mid_West,"Midwest",
                                                     if_else(REGION_0 %in% West,"West",
                                                             if_else(REGION_0 %in% North_East,"Northeast",
                                                                     if_else(REGION_0 %in% South,"South","NA")))))
  }
  else{return(dataframe)}
  
  return(dataframe)
  
}


  

#Gender
fn.gender <- function(dataframe){
  dataframe <- mutate(dataframe,
      gender = case_when(
      gender == 'Male' ~'Male',
      gender =='Masculino' ~'Male',
      gender == "男"~'Male',
      gender == 'Homme'~'Male',
      gender == 'Maschio'~'Male',
      gender == 'Female' ~ 'Female',
      gender == 'Feminino' ~ 'Female',
      gender == 'Femenino' ~ 'Female',
      gender == "女" ~ 'Female',
      gender == 'Femme'~ 'Female',
      gender ==  'Femmina'~ 'Female'
      ))
}
